# Clear memory
rm(list=ls())

# Load packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  tidyverse, readxl, data.table, stringr, collapse, modelsummary, knitr, kableExtra, haven, usmap, tigris, ggpubr, fixest
)

### NEI

### SIC 87 to NAICS12 crosswalk. 

sic87_to_naics12 <- fread("raw/CBP/industry_crosswalk/efsy_sic87_to_naics12.csv") %>%
  rename(SIC = sic87) %>%
  mutate(SIC = str_remove_all(SIC, "-"))

sic87_to_naics12$SIC = str_replace(sic87_to_naics12$SIC,"\\\\","")

sic87_to_naics12 = sic87_to_naics12 %>%
  mutate(SIC = as.integer(SIC))


# Make NEI function
nei_data_function = function(selected_year) {
  file_to_import = paste0("raw/nei/", selected_year, "SicSummarymade09082005.txt")
  temp_data = fread(file_to_import)    %>%
    full_join(sic87_to_naics12) %>%
    mutate(first_two_naics_digit = as.integer(substr(as.character(naics12), 1, 2))) %>%
    filter(first_two_naics_digit >= 31 & first_two_naics_digit <= 33) %>%
    mutate(weighted_pollution = weight*ANNUAL_EMISSION) %>%
    rename(state = STATE_FIPS, county = COUNTY_FIPS) %>%
    collap( 
      by = ~ state + county +POLLUTANT_CODE, 
      custom = list(fsum = c("weighted_pollution"))) %>% 
    filter(POLLUTANT_CODE != "PM25-FIL") %>%
    filter(POLLUTANT_CODE != "PM-PRI") %>%
    filter(POLLUTANT_CODE != "PM-FIL") %>%
    filter(POLLUTANT_CODE != "PM10-FIL") %>%
    filter(POLLUTANT_CODE != "PM-CON") %>%
    mutate(POLLUTANT_CODE = 
             case_when(POLLUTANT_CODE %in% c("PM25-PRI") ~ "PM25", 
                       POLLUTANT_CODE %in% c("PM10-PRI") ~ "PM10",
                       TRUE ~ POLLUTANT_CODE)) %>%
    pivot_wider(id_cols = c(state, county), names_from = POLLUTANT_CODE, values_from = weighted_pollution) %>%
    mutate(year = selected_year)
  return(temp_data)
}


# Run function
nei1997 = nei_data_function(selected_year = 1997)

## Create emissions for 1997 in right order 


# Put zeros in where there are NAs  

nei1997 = nei1997 %>% 
  mutate(across(everything(), .fns = ~replace_na(.,0))) %>%
  mutate(str_state = str_pad(state, 2, "0", side = "left")) %>%
  mutate(str_county = str_pad(county, 3, "0", side = "left" )) %>%
  mutate(fips = paste0(str_state, str_county)) %>%
  mutate(fips = as.numeric(fips))


# Import fips code list 
fips_order = fread("raw/modified-ap3/fips.csv") 
fips_order = fips_order %>% 
  rename(fips = V1)

# Left join
emissions_for_model = fips_order %>%
  left_join(nei1997) %>% 
  mutate(across(c("CO",         "NH3",        "NOX" ,       "PM10" ,      "PM25" ,     "SO2" ,       "VOC")   , .fns = ~replace_na(.,0))) %>%
  select(NH3, NOX, PM25, SO2, VOC)

# Save file 
fwrite(emissions_for_model, file = "data/emissions_cf.csv", col.names = FALSE)


